# coding:utf-8

from numpy import *
import operator


def create_data_set():
    group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
    labels = ['A', 'A', 'B', 'B']
    return group, labels


def classify0(in_x, data_set, labels, k):
    # data_set是numpy.ndarray类型，其shape属性是tuple类型，分别表示ndarray对象的行数和列数
    data_set_size = data_set.shape[0]
    # numpy.tile(A, reps)方法构建一个数组，这个数组是将参数A重复reps次，返回值为numpy.ndarray类型
    test_vector = tile(in_x, (data_set_size, 1))
    # 两个ndarray类型对象相减，表示两个矩阵相减
    diff_mat = test_vector - data_set
    sq_diff_mat = diff_mat ** 2
    sq_distances = sq_diff_mat.sum(axis=1)
    distnaces = sq_distances ** 0.5
    sorted_dist_indicies = distnaces.argsort()
    class_count = {}
    for i in range(k):
        vote_label = labels[sorted_dist_indicies[i]]
        class_count[vote_label] = class_count.get(vote_label, 0) + 1
    sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_class_count[0][0]
    print(data_set_size)


def file_to_matrix(filename):
    fr = open(filename)
    line_array = fr.readline()
    line_number = len(line_array)
    return_mat = zeros((line_number, 3))
    class_label_vector = []
    index = 0
    for line in line_array:
        line = line.strip()
        data_list = line.split('\t')
        return_mat[index, :] = data_list[0: 3]
        class_label_vector.append(int(data_list[-1]))
        index += 1
    return return_mat, class_label_vector


if __name__ == "__main__":
    group, labels = create_data_set()
    classify0([0, 0], group, labels, 3)

    # print(group)
    # print(labels)
